# load packages 
library(tidyverse)
library(miceadds)

#### Figure 1 ####

# import data
tab <- read.csv("figure_1_data.csv")

# rename, relevel variables
tab <- tab |> mutate(stage = fct_relevel(stage, "Not written", 
                                  "Written but not published", 
                                  "Submitted", "Published"),
                     sig = case_match(sig,
                                      "Null" ~ "Null Results",
                                      "Significant" ~ "Significant Results",
                                      "Difference" ~ "File Drawer Bias (Significant - Nonsignificant Results)"),
                     sig = fct_relevel(sig, "Null Results", "Significant Results",
                                       "File Drawer Bias (Significant - Nonsignificant Results)"),
                     researcher = fct_relevel(researcher, "Franco",
                                              "TESS", "Persevered"))

# produce table
tab |> filter(stage != "Written but not submitted",
              stage != "Submitted") |>
      mutate(stage = case_match(stage,
            "Written but not published" ~ "Written but \nnot published", 
            .default = stage),
            stage = fct_relevel(stage, "Not written", 
                                "Written but \nnot published"),
            sig = case_match(sig,
            "File Drawer Bias (Significant - Nonsignificant Results)" ~ 
                  "File Drawer Bias \n(Significant - Insignificant Results)",
            "Null Results" ~ "Insignificant Results",
            .default = sig
            ),
            sig = fct_relevel(sig, "Insignificant Results", "Significant Results"),
            researcher = case_match(researcher,
                                    "Franco" ~ "Franco et al. \n(2014)",
                                    .default = researcher),
            researcher = fct_relevel(researcher, "Franco et al. \n(2014)", "TESS")) |>
      ggplot(aes(stage, percent)) +
      geom_col(aes(fill = factor(researcher)), 
               position = position_dodge(.9)) +
      geom_text(aes(group = researcher, 
                    label = paste0(round(percent, 0), "%"), 
                    vjust = ifelse(percent < 0, 1.25, -.4)),
                position = position_dodge2(.9), size = 3) +
      scale_y_continuous(labels = scales::label_percent(scale = 1)) + 
      scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"), 
                        name = "Researcher \nGroup") +
      labs(y = "Percent", x = "Project Outcome") +
      theme_bw() +
      theme(strip.text = element_text(size = 12), 
            axis.text.x.bottom = element_text(size = 10), 
            strip.text.x = element_text(face = "bold")) +
      facet_grid(~sig)

#### Read in survey data ####
d <- read.csv("TESS_applicants.csv")

#### Raw counts for Table 1 ####
# Each command creates two tables.
# The first shows counts of TESSers (accepted == 1). 
# The second is for perseverering researchers (accepted == 0). 

# first row Table 1
# Counts are in the first rows
table(d$write_up, d$sig_find_strict, d$accepted)

# second row of Table 1
# # Counts are in the second rows
table(d$write_up, d$sig_find_strict, d$accepted)

# third row of Table 1
# Counts are in the first rows 
table(d$published, d$sig_find_strict, d$accepted)

# Fourth row of Table 1
# Counts are in the second rows 
table(d$published, d$sig_find_strict, d$accepted)

#### In-Text Analyses ####

## file drawer bias (sig vs null)
# TESSers 
prop.test(matrix(c(46, 61-46, 21, 46-21), ncol = 2, byrow = T), correct = F)

# perseverers
prop.test(matrix(c(98, 171-98, 35, 97-35), ncol = 2, byrow = T), correct = F)

## sig vs null - submitted but not published
# TESSers
prop.test(matrix(c(10, 51, 5, 41), ncol = 2, byrow = T), correct = F)

# perseverers
prop.test(matrix(c(38, 171-38, 9, 97-9), ncol = 2, byrow = T), correct = F)

## sig vs null - not written
# TESSers
prop.test(matrix(c(5, 61-5, 13, 46-13), ncol = 2, byrow = T), correct = F)

# perseverers
prop.test(matrix(c(23, 171-23, 40, 97-40), ncol = 2, byrow = T), correct = F)

# difference in differences of proportions
( ( (5/61) - (13/46) ) - ( (23/171) - (40/97) ) ) / 
    sqrt( ( (5/61) * (1-(5/61)) / 61) +  # p1
      ( (13/46) * (1-(13/46)) / 46) + # p2
      ( (23/171) * (1-(23/171)) / 171) + # q1
      ( (40/97) * (1-(40/97)) / 97) )# q2

# p-value 
1 - pnorm(0.8223093)

# 95% CI
p <- ( (5/61) - (13/46) ) - ( (23/171) - (40/97) )
se <- sqrt( ( (5/61) * (1-(5/61)) / 61) +  # p1
            ( (13/46) * (1-(13/46)) / 46) + # p2
            ( (23/171) * (1-(23/171)) / 171) + # q1
            ( (40/97) * (1-(40/97)) / 97) )# q2
p - 1.96*se
p + 1.96*se

## file drawer biases of ours vs. Franco et al. 2014
# TESSers
matrix(c(46, 61-46, 21, 46-21), ncol = 2, byrow = T) # Ours

56/91 - 10/48 # Franco et al. 2014, from Table 2

# difference in differences of proportions z-test
( ( (56/91) - (10/48) ) - ( (46/61) - (21/46) ) ) / 
      sqrt( ( (56/91) * (1-(56/91)) / 91) +  # p1
                  ( (10/48) * (1-(10/48)) / 48) + # p2
                  ( (46/61) * (1-(46/61)) / 61) + # q1
                  ( (21/46) * (1-(21/46)) / 46) )# q2

# 95% CI
p <- ( ( (56/91) - (10/48) ) - ( (46/61) - (21/46) ) )
se <- sqrt( ( (56/91) * (1-(56/91)) / 91) +  # p1
                  ( (10/48) * (1-(10/48)) / 48) + # p2
                  ( (46/61) * (1-(46/61)) / 61) + # q1
                  ( (21/46) * (1-(21/46)) / 46) )# q2
p - 1.96*se
p + 1.96*se

# p-value
(1 - pnorm(0.9100609)) *2

# perseverers
prop.test(matrix(c(98, 171-98, 35, 97-35), ncol = 2, byrow = T), correct = F)

# difference in differences of proportions z-test
( ( (56/91) - (10/48) ) - ( (98/171) - (35/97) ) ) / 
      sqrt( ( (56/91) * (1-(56/91)) / 91) +  # p1
                  ( (10/48) * (1-(10/48)) / 48) + # p2
                  ( (98/171) * (1-(98/171)) / 171) + # q1
                  ( (35/97) * (1-(35/97)) / 97) ) # q2

# p-value
(1 - pnorm(1.963002)) *2

# 95% CI
p <- ( ( (56/91) - (10/48) ) - ( (98/171) - (35/97) ) ) 
se <- sqrt( ( (56/91) * (1-(56/91)) / 91) +  # p1
                  ( (10/48) * (1-(10/48)) / 48) + # p2
                  ( (98/171) * (1-(98/171)) / 171) + # q1
                  ( (35/97) * (1-(35/97)) / 97) ) # q2
p - 1.96*se
p + 1.96*se

## pooled 

p <- ( ( (56/91) - (10/48) ) - ( (46+98)/(232) - (21+35)/(143) ) )  
se <- sqrt( ( (56/91) * (1-(56/91)) / 91) +  # p1
                  ( (10/48) * (1-(10/48)) / 48) + # p2
                  ( (46+98)/(232) * (1-(46+98)/(232)) / 232) + # q1
                  ( (21+35)/(143) * (1-(21+35)/(143)) / 143) ) # q2

p - 1.96*se
p + 1.96*se

# p-value
(1 - pnorm(1.906079)) *2

## not write up - Franco vs Us
prop.test(matrix(c(31, 48-31, 13, 46-13), ncol = 2, byrow = T), correct = F)


## file drawer bias (top journals only)
# TESSers
with(subset(d, accepted == 1), 
            table(sig_find_strict, published, top_journal))

prop.test(matrix(c(38, 7, 12, 8), ncol = 2, byrow = T), correct = F)

# perseverers
with(subset(d, accepted == 0), 
     table(sig_find_strict, published, top_journal))

prop.test(matrix(c(55, 42, 20, 15), ncol = 2, byrow = T), correct = F)


#### Supplementary Materials ####

##### DV = writing up #####
## TESSers
sigwuA <- glm.cluster(d, write_up ~ factor(sig_find_strict) +
                            age + female_or_nb + nonwhite +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "2accepted",
                      family = "binomial")
summary(sigwuA)

## persevering researchers
sigwuP <- glm.cluster(d, write_up ~ factor(sig_find_strict) +
                            age + female_or_nb + nonwhite +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "1declined, but pursued",
                      family = "binomial")
summary(sigwuP)

## interaction model
sigwuI <- glm.cluster(d, write_up ~ factor(sig_find_strict)*scenario3 +
                            age + female_or_nb + nonwhite +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id",
                      family = "binomial")
summary(sigwuI)


##### DV = submitting to a journal #####
## TESSers
sigwuA <- glm.cluster(d, submit_journal ~ factor(sig_find_strict) +
                            age + female_or_nb + race2 +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "2accepted",
                      family = "binomial")
summary(sigwuA)

## persevering researchers
sigwuP <- glm.cluster(d, submit_journal ~ factor(sig_find_strict) +
                            age + female_or_nb + nonwhite +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "1declined, but pursued",
                      family = "binomial")
summary(sigwuP)

## interaction model
sigwuI <- glm.cluster(d, submit_journal ~ factor(sig_find_strict)*scenario3 +
                            age + female_or_nb + race2 +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id",
                      family = "binomial")
summary(sigwuI)


##### DV = publishing #####
## TESSers
sigwuA <- glm.cluster(d, published ~ factor(sig_find_strict) +
                            age + female_or_nb + race2 +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "2accepted",
                      family = "binomial")
summary(sigwuA)

## persevering researchers
sigwuP <- glm.cluster(d, published ~ factor(sig_find_strict) +
                            age + female_or_nb + race2 +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id", subset = d$scenario3 == "1declined, but pursued",
                      family = "binomial")
summary(sigwuP)

## interaction model
sigwuI <- glm.cluster(d, published ~ factor(sig_find_strict)*scenario3 +
                            age + female_or_nb + race2 +
                            parents_edu3 + children_have + 
                            risk_prone +
                            money_research_budget + int_grant_opp +
                            research_time + students_advised +
                            dept_confers_phd + coauthor_have + 
                            tenure + pubs_articles + pubs_books, 
                      cluster = "id",
                      family = "binomial")
summary(sigwuI)

